--- title: mahoudata keywords: fastai sidebar: home_sidebar summary: "API details." ---

class PreProcess[source]

PreProcess(ctx)

Preprocess class to include all data preparation functions

PreProcess.clean_duplicates[source]

PreProcess.clean_duplicates()

Clean duplicates method

PreProcess.cols_munging[source]

PreProcess.cols_munging(dataframe, fillna=True)

Columns preparation method

PreProcess.scale_cols[source]

PreProcess.scale_cols(dataframe)

Min Max scaler for numeric columns

class RecommenderStrategyFactory[source]

RecommenderStrategyFactory(ctx)

Strategy factory

RecommenderStrategyFactory.createStrategy[source]

RecommenderStrategyFactory.createStrategy(strategy)

class NumericStrategy[source]

NumericStrategy(ctx)

Numeric based recommender system

NumericStrategy.model_builder[source]

NumericStrategy.model_builder(dataframe)

NumericStrategy.exec_strategy[source]

NumericStrategy.exec_strategy(dataframe, distance='cosine')

Explore Data

df = pd.read_csv("./data/dataset-datathon.csv")
profile = ProfileReport(df, title='Pandas Profiling Report', html={'style':{'full_width':True}})
profile.to_notebook_iframe()

Remove duplicates

According to profile there are 60% duplicates. Get rid of them

df_clean = df.drop_duplicates(
#subset = df.columns.difference(['vajilla'])
)
profile = ProfileReport(df, title='Pandas Profiling Report', html={'style':{'full_width':True}})
profile.to_notebook_iframe()

Run Recommender

context = {'numeric_cols' : ['lupulo_afrutado_citrico', 
                             'lupulo_floral_herbal','amargor', 'color', 
                             'maltoso', 'licoroso', 'afrutado', 'especias','acidez']
}

f = RecommenderStrategyFactory(context)

strategy = f.createStrategy('numeric')

datamodel = strategy.model_builder(df)

recommender_df = strategy.exec_strategy(datamodel)

recommender_df
0 1 2 3 4 5 6 7 8 9 ... 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197
0 0.000000 0.000000 0.042737 0.014204 0.019602 0.003507 0.003507 0.046649 0.079535 0.019307 ... 0.033039 0.033039 0.033039 0.033039 0.033039 0.033039 0.033039 0.040066 0.023939 0.023939
1 0.000000 0.000000 0.042737 0.014204 0.019602 0.003507 0.003507 0.046649 0.079535 0.019307 ... 0.033039 0.033039 0.033039 0.033039 0.033039 0.033039 0.033039 0.040066 0.023939 0.023939
2 0.042737 0.042737 0.000000 0.027731 0.111271 0.045083 0.045083 0.139327 0.149810 0.109016 ... 0.009456 0.009456 0.009456 0.009456 0.009456 0.009456 0.009456 0.082896 0.118115 0.118115
3 0.014204 0.014204 0.027731 0.000000 0.042773 0.014581 0.014581 0.072928 0.073416 0.040805 ... 0.016657 0.016657 0.016657 0.016657 0.016657 0.016657 0.016657 0.033592 0.050960 0.050960
4 0.019602 0.019602 0.111271 0.042773 0.000000 0.016331 0.016331 0.029392 0.063220 0.008608 ... 0.089179 0.089179 0.089179 0.089179 0.089179 0.089179 0.089179 0.053573 0.012559 0.012559
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1193 0.033039 0.033039 0.009456 0.016657 0.089179 0.036679 0.036679 0.116344 0.099990 0.080648 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.061789 0.093179 0.093179
1194 0.033039 0.033039 0.009456 0.016657 0.089179 0.036679 0.036679 0.116344 0.099990 0.080648 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.061789 0.093179 0.093179
1195 0.040066 0.040066 0.082896 0.033592 0.053573 0.043635 0.043635 0.026478 0.098794 0.036349 ... 0.061789 0.061789 0.061789 0.061789 0.061789 0.061789 0.061789 0.000000 0.038070 0.038070
1196 0.023939 0.023939 0.118115 0.050960 0.012559 0.028685 0.028685 0.019168 0.095705 0.010767 ... 0.093179 0.093179 0.093179 0.093179 0.093179 0.093179 0.093179 0.038070 0.000000 0.000000
1197 0.023939 0.023939 0.118115 0.050960 0.012559 0.028685 0.028685 0.019168 0.095705 0.010767 ... 0.093179 0.093179 0.093179 0.093179 0.093179 0.093179 0.093179 0.038070 0.000000 0.000000

1198 rows × 1198 columns

recommendations_example = pd.DataFrame(recommender_df[1].sort_values(ascending=True))
recommendations_example
1
0 0.000000
512 0.000000
511 0.000000
510 0.000000
509 0.000000
... ...
1060 0.647605
144 0.660262
195 NaN
197 NaN
334 NaN

1198 rows × 1 columns